
library(ggplot2)
library(stringr)
library(tidyr)
library(plyr)
library(dplyr)
library(stargazer)
library(margins)
library(reshape2)
library(MASS)
library(ggplot2)
library(Hmisc)
library(stm)
library(Jmisc)
library(fastDummies)
library(lessR)
library(broom)
library(clubSandwich)
library(cobalt)



##############################################################


set.seed(44)

remove(list=ls())


#Load the session data
# The code for prepping this single data file is in fullscript_regularselection_2023_07_05.R; running lines 1-78 results in this .RDS file.
load("~/Dropbox/Selection Experiments Harvard/ISQ formatting/session_prepped_data.RData")

# Loading command only used for local check on a variable name mistake ISQ replication caught
#load("/Users/stephenchaudoin/Dropbox/Selection Experiments Harvard/ISQ formatting/replication materials/session_prepped_data.RData")


##############################################################
### DATA PREP FOR ANALYSIS
##############################################################
# This section prepares the data for analysis.  It also has the code which identifies the few dropout participants.
# It also splits out the demographics information into a separate object, sessions_demog, which will be used later in the section replicating Appendix E.

#	Splitting out the demographics
sessions_demog <- sessions[ which(sessions$Stepgroup.Label == "Demographics"), ]

#	Data frame without the demographics or junk rows
sessions <- sessions[ which(sessions$Stepgroup.Label != "Demographics"), ]
sessions <- sessions[ which(sessions$Stepgroup.Label != ""), ]

#	Converting factors to numerics and characters (if necessary)
# sessions$prize <- as.numeric(levels(sessions$prize))[sessions$prize] 	### SC 1-12-21, we don't need this command with the complete cases version of the data

sessions$leader <- as.character(sessions$leader)

#	Creating the prize values by part/round.
sessions <- sessions %>% group_by(sessionno,Stepgroup.Label,Stepgroup.Loop) %>% dplyr::mutate(prize=max(prize, na.rm = TRUE))
sessions <- as.data.frame(sessions)

###	Merging in leaders

#	Splitting out the rows that just say who the leader was for a particular round
sessions_leaderids <- sessions[ which(sessions$Participant == ""), ]
sessions_leaderids <- sessions_leaderids[ which(sessions_leaderids$Group != ""), ]
sessions_leaderids <- dplyr::select(sessions_leaderids, c(sessionno, Stepgroup.Label, Stepgroup.Loop, Group, leader))

#	Reshape long to wide
sessions_leaderids <- spread(sessions_leaderids, Group, leader)
sessions_leaderids <- dplyr::select(sessions_leaderids, -c(G.3:G.8))

#	Renaming columns, for merge
colnames(sessions_leaderids)[colnames(sessions_leaderids)=="G.1"] <- "leader_g1"
colnames(sessions_leaderids)[colnames(sessions_leaderids)=="G.2"] <- "leader_g2"

#	Merging in the columns that show the leaders for each group by round
sessions <- merge(sessions, sessions_leaderids, by = c("sessionno","Stepgroup.Label","Stepgroup.Loop"))

#	Merging in the candidate counts by group
#	Splitting out the rows that just say how many candidates ran for a group in a certain round
candidate_count_subset <- sessions[ which(sessions$Participant == ""), ]
candidate_count_subset <- candidate_count_subset[ which(candidate_count_subset$Group != ""), ]
candidate_count_subset <- dplyr::select(candidate_count_subset, c(sessionno, Stepgroup.Label, Stepgroup.Loop, Group, candidateCount))

candidate_count_subset <- spread(candidate_count_subset, Group, candidateCount)
candidate_count_subset <- dplyr::select(candidate_count_subset, -c(G.3:G.8))

#	Renaming columns, for merge
colnames(candidate_count_subset)[colnames(candidate_count_subset)=="G.1"] <- "candidate_count_g1"
colnames(candidate_count_subset)[colnames(candidate_count_subset)=="G.2"] <- "candidate_count_g2"

#	Merging in the columns that show the number of candidates for each group by round
sessions <- merge(sessions, candidate_count_subset, by = c("sessionno","Stepgroup.Label","Stepgroup.Loop"))

#	Creating a variable isleader that = 1 for the two leaders each round
sessions$isleader <- ifelse(sessions$Participant == sessions$leader_g1 | sessions$Participant == sessions$leader_g2 | sessions$Stepgroup.Label == "LotteryExperiment", 1, 0)

sessions %>% filter(Stepgroup.Label == "LotteryExperiment") %>% filter(isleader == 0)

#	Recoding leader = 1 for part 2, rounds 2/7/9
sessions$isleader[sessions$Stepgroup.Label == "GroupExperiment" &  sessions$Stepgroup.Loop == 2] <- 1
sessions$isleader[sessions$Stepgroup.Label == "GroupExperiment" &  sessions$Stepgroup.Loop == 7] <- 1
sessions$isleader[sessions$Stepgroup.Label == "GroupExperiment" &  sessions$Stepgroup.Loop == 9] <- 1

#	Creating a variable that = 1 if the participant was _ever_ an elected leader
sessions$everelecleader <- ifelse(sessions$isleader == 1 & sessions$Stepgroup.Label == "ElectionsandContest", 1, 0)
sessions <- sessions %>% group_by(sessionno,Participant) %>% dplyr::mutate(everelecleader=max(everelecleader, na.rm = TRUE))
sessions <- as.data.frame(sessions)

#	Creating a variable that = 1 if the participant was a candidate in that round
sessions$iscandidate <- ifelse(sessions$candidate == "yes" & sessions$Stepgroup.Label == "ElectionsandContest", 1, 0)

#	Creating a variable that = 1 if the participant was a candidate, but not the leader, in that round
sessions$islosingcandidate <- ifelse(sessions$candidate == "yes" & sessions$isleader == 0 & sessions$Stepgroup.Label == "ElectionsandContest", 1, 0)

#	Creating a variable that = 1 if the participant was not a candidate, in that round
sessions$isnotcandidate <- ifelse(sessions$candidate == "no" & sessions$Stepgroup.Label == "ElectionsandContest", 1, 0)

# Creating variables for part-specific leader indicators
sessions$ispart3leader <- ifelse(sessions$Stepgroup.Label == "ElectionsandContest" & sessions$isleader == 1, 1, 0)
sessions$ispart2leader <- ifelse(sessions$Stepgroup.Label == "GroupExperiment" & sessions$isleader == 1, 1, 0)
sessions$ispart1leader <- ifelse(sessions$Stepgroup.Label == "LotteryExperiment" & sessions$isleader == 1, 1, 0)

#	Creating a variable that = 1 if it's a low value round
sessions$islvr <- ifelse(sessions$prize < 1000, 1, 0)

#	Creating a variable that = 1 if they bought 1000 tickets
sessions$maxtickets <- ifelse(sessions$boughtTickets == 1000, 1, 0)

#	Creating a variable that = 1 in ODD rounds
sessions$isoddround <- ifelse(sessions$Stepgroup.Loop == 1 | sessions$Stepgroup.Loop == 3 | sessions$Stepgroup.Loop == 5 | sessions$Stepgroup.Loop == 7 | 
                                sessions$Stepgroup.Loop == 9 | sessions$Stepgroup.Loop == 11, 1, 0)

### Outcome variables

#	Creating variable for the effort as a percent of the prize value
sessions$effort_pctprize <- sessions$boughtTickets / sessions$prize

# Creating a variable that is a counter of the round number
sessions$roundnumber <- ifelse(sessions$Stepgroup.Label == "GroupExperiment", sessions$Stepgroup.Loop+12, sessions$Stepgroup.Loop)
sessions$roundnumber <- ifelse(sessions$Stepgroup.Label == "ElectionsandContest", sessions$Stepgroup.Loop+24, sessions$roundnumber)

# Creating a dummy variables for each part
sessions$ispart3 <- ifelse(sessions$Stepgroup.Label == "ElectionsandContest", 1, 0)
sessions$ispart2 <- ifelse(sessions$Stepgroup.Label == "GroupExperiment", 1, 0)
sessions$ispart1 <- ifelse(sessions$Stepgroup.Label == "LotteryExperiment", 1, 0)

# Creating a factor that is unique to session-participant
sessions$participantfe_string <- with(sessions, paste0("s.",as.character(sessionno),".",Participant))
sessions$participantfe_factor <- as.factor(sessions$participantfe_string)

#	Creating a variable elected that = 1 for elected leaders who ran unopposed
sessions$electedunopposed <- ifelse(((sessions$Participant == sessions$leader_g1) & (sessions$candidate_count_g1 == 1)) | ((sessions$Participant == sessions$leader_g2) & (sessions$candidate_count_g2 == 1)), 1, 0)

# Sanity checking the number of leaders who ran unopposed
nrow(sessions %>% filter(electedunopposed == 1))
#38

# Creating a variable selectedrandomlydsg that = 1 for leaders who were selected randomly because no one ran in an election in part 3
sessions$selectedrandomlydsg <- ifelse(((sessions$Participant == sessions$leader_g1) & (sessions$candidate_count_g1 == 0)) | ((sessions$Participant == sessions$leader_g2) & (sessions$candidate_count_g2 == 0)) , 1, 0)

# Sanity checking the number of leaders in DSG who were randomly selected
nrow(sessions %>% filter(selectedrandomlydsg == 1))
#6

# Deleting blank rows
sessions <- sessions[ which(sessions$boughtTickets != "NA"), ]

# Identifying strange participants
sessions$sessionparticipant <- with(sessions, paste0(as.character(sessions$sessionno),sessions$Participant))
# Strange: 224P.3 (1000 or 200 only; L) 220P.4 (1000 or 999/998; L) 214P.6 (Almost always 1000; L)
# All 1000: 218P.18 (NL)
# All 160: 214.P20

suspected_bot_observations <- sessions[ which(sessions$sessionparticipant == "218P.18" | sessions$sessionparticipant == "214P.20"),]

# Removing the two participants that always chose the same values.
sessions <- sessions[ which(sessions$sessionparticipant != "218P.18"), ]
sessions <- sessions[ which(sessions$sessionparticipant != "214P.20"), ] 

# Removing one aberrant observation where boughtTickets exceeded our programmtically set maximum
sessions <- sessions %>% filter(boughtTickets < 1001)

# Below determines which participants dropped out early (before reaching DSG) and tags them with earlydrop == 1
# 9 participants who dropped out early and didn't make it play in all three stages
# 7 dropped out after playing one round or less
# 2 dropped out during second stage (1 played 17 rounds and the other 15)

# This gets rid of participant FE factor like "s.219." which doesn't correspond to a real session-participant FE
parts<-levels(sessions$participantfe_factor)

for(i in c(1:length(parts))){
  parts[i]<-ifelse(nrow(subset(sessions,sessions$participantfe_factor==parts[i]))==0,NA,parts[i])
}
parts<-parts[-c(which(is.na(parts)==TRUE))]

type<-rep(NA,length(parts))
indeff<-rep(NA,length(parts))
groupeff<-rep(NA,length(parts))
demeff<-rep(NA,length(parts))
hypgroupeff<-rep(NA,length(parts))
hypdemeff<-rep(NA,length(parts))

for(i in c(1:length(parts))){
  subby<-subset(sessions,sessions$participantfe_factor==parts[i])
  type[i]<-(sum(subby$everelecleader)/nrow(subby))	
  subby1<-subset(subby,subby$Stepgroup.Label=="LotteryExperiment")
  subby2<-subset(subby,subby$Stepgroup.Label=="GroupExperiment")
  subby3<-subset(subby,subby$Stepgroup.Label=="ElectionsandContest")
  subby2l<-subset(subby2,subby2$isleader==1)
  subby2h<-subset(subby2,subby2$isleader==0)	
  subby3l<-subset(subby3,subby3$isleader==1)
  subby3h<-subset(subby3,subby3$isleader==0)	
  indeff[i]<-mean(subby1$effort_pctprize,na.rm=TRUE)
  groupeff[i]<-mean(subby2l$effort_pctprize,na.rm=TRUE)
  demeff[i]<-mean(subby3l$effort_pctprize,na.rm=TRUE)
  hypgroupeff[i]<-mean(subby2h$effort_pctprize,na.rm=TRUE)
  hypdemeff[i]<-mean(subby3h$effort_pctprize,na.rm=TRUE)
}


data<-data.frame(parts,type,indeff,groupeff,hypgroupeff,demeff,hypdemeff)

data$earlydrop<-ifelse(is.na(data$hypdemeff)==TRUE,1,0)

for(i in c(1:nrow(sessions))){
  name<-as.character(sessions$participantfe_factor[i])
  sessions$earlydrop[i]<-ifelse(data[which(data$parts==name),which(colnames(data)=="earlydrop")]==1,1,0)
}

## 9 participants who dropped out early and didn't make it play in all three stages
## 7 dropped out after playing one round 
## 2 dropped out during second stage (1 played 17 rounds and the other 15)
summary(sessions[which(sessions$earlydrop==1),]$participantfe_factor)

# Num dropped out
length(unique(sessions[which(sessions$earlydrop==1),]$sessionparticipant))

# Num total
length(unique(sessions$sessionparticipant))

# Remove the observations from participants who dropped out early
sessions <- sessions %>% filter(earlydrop == 0)
suspected_bot_observations$earlydrop <- 0

# Create a factor variable to categorize the prize amount levels
sessions$payofflevel<-ifelse(sessions$prize<1000,1,ifelse(sessions$prize>1000&sessions$prize<1500,2,ifelse(sessions$prize>1500 & sessions$prize<2000,3,ifelse(sessions$prize>2000 & sessions$prize<2400,4, ifelse(sessions$prize>2400 & sessions$prize<2500,5,6)))))
suspected_bot_observations$payofflevel <- ifelse(suspected_bot_observations$prize<1000,1,ifelse(suspected_bot_observations$prize>1000&suspected_bot_observations$prize<1500,2,ifelse(suspected_bot_observations$prize>1500 & suspected_bot_observations$prize<2000,3,ifelse(suspected_bot_observations$prize>2000 & suspected_bot_observations$prize<2400,4, ifelse(suspected_bot_observations$prize>2400 & suspected_bot_observations$prize<2500,5,6)))))

















############################################################
## Demographics: Code for Appendix Table E.1 and E.2
############################################################
# The object sessions has demographic data and the data about participants' play.  Here, we split out the demographic data and replicate the summary statistical information
#   from appendix E.  The next section finishes the data prep on the data that is about participants' play and then replicates analysis of their play.

sessions_demog$sessionparticipant <- with(sessions_demog, paste0(as.character(sessions_demog$sessionno),sessions_demog$Participant))
sessions_demog$age <- as.numeric(sessions_demog$age)

##Create a binary demog variable of race == white
sessions_demog$white <- 0
sessions_demog[sessions_demog$race == "white",]$white <- 1

##Create a binary demog variable of gender == female
sessions_demog$female <- 0
sessions_demog[sessions_demog$gender == "f",]$female <- 1

sessions_demog$nocollege <- ifelse(sessions_demog$education == "highSchool" | sessions_demog$education == "twelfth", 1, 0)
sessions_demog$somecollege <- ifelse(sessions_demog$education == "collegeNoDegree", 1, 0)
sessions_demog$collegedegree <- ifelse(sessions_demog$education == "associateDegree" | sessions_demog$education == "bachelor", 1, 0)
sessions_demog$postgraduate <- ifelse(sessions_demog$education == "master" | sessions_demog$education == "professional", 1, 0)

sessions_demog$income_0_10k <- ifelse(sessions_demog$income == "to9999", 1, 0)
sessions_demog$income_10_20k_or_less <- ifelse(sessions_demog$income == "to9999" |sessions_demog$income == "to12499" | sessions_demog$income == "to14999" | sessions_demog$income == "to19999", 1, 0)
sessions_demog$income_20_30k_or_less <- ifelse(sessions_demog$income == "to9999" |sessions_demog$income == "to12499" | sessions_demog$income == "to14999" | sessions_demog$income == "to19999" | sessions_demog$income == "to24999" | sessions_demog$income == "to29999", 1, 0)
sessions_demog$income_30_40k_or_less <- ifelse(sessions_demog$income == "to9999" |sessions_demog$income == "to12499" | sessions_demog$income == "to14999" | sessions_demog$income == "to19999" | sessions_demog$income == "to24999" | sessions_demog$income == "to29999" | sessions_demog$income == "to34999" | sessions_demog$income == "to39999", 1, 0)
sessions_demog$income_40_50k_or_less <- ifelse(sessions_demog$income == "to9999" |sessions_demog$income == "to12499" | sessions_demog$income == "to14999" | sessions_demog$income == "to19999" | sessions_demog$income == "to24999" | sessions_demog$income == "to29999" | sessions_demog$income == "to34999" | sessions_demog$income == "to39999" | sessions_demog$income == "to49999", 1, 0)
sessions_demog$income_50_60k_or_less <- ifelse(sessions_demog$income == "to9999" |sessions_demog$income == "to12499" | sessions_demog$income == "to14999" | sessions_demog$income == "to19999" | sessions_demog$income == "to24999" | sessions_demog$income == "to29999" | sessions_demog$income == "to34999" | sessions_demog$income == "to39999" | sessions_demog$income == "to49999" | sessions_demog$income == "to59999", 1, 0)
sessions_demog$income_60k_plus <- ifelse(sessions_demog$income == "to9999" |sessions_demog$income == "to12499" | sessions_demog$income == "to14999" | sessions_demog$income == "to19999" | sessions_demog$income == "to24999" | sessions_demog$income == "to29999" | sessions_demog$income == "to34999" | sessions_demog$income == "to39999" | sessions_demog$income == "to49999" | sessions_demog$income == "to59999" | sessions_demog$income == "to74999" | sessions_demog$income == "to84999" | sessions_demog$income == "to99999" | sessions_demog$income == "to124999" | sessions_demog$income == "to149999"| sessions_demog$income == "to174999" | sessions_demog$income == "moreThan175000", 1, 0)


minimal_sessions_demog <- sessions_demog %>% dplyr::select(sessionparticipant, age, nocollege, somecollege, collegedegree,postgraduate , white, income_0_10k,income_10_20k_or_less,  
                                                    income_20_30k_or_less, income_30_40k_or_less,income_40_50k_or_less,
                                                    income_50_60k_or_less,income_60k_plus, female)



#sessions <- sessions %>% select(-age)
sessions_w_demog <- merge(x = sessions, y = minimal_sessions_demog, by = "sessionparticipant", all.x = TRUE)



part3_sessions <- sessions_w_demog %>% filter(Stepgroup.Label == "ElectionsandContest")
part3_sessions$candidate <- NA
part3_sessions[(part3_sessions$iscandidate ==1), ]$candidate <- 1
part3_sessions[(part3_sessions$iscandidate ==0), ]$candidate <- 0
part3_sessions$treat <- part3_sessions$candidate

names(part3_sessions)[names(part3_sessions) == "age.y"] <- "age"

#Table E.1
bal.tab(treat ~ age + female + white + income_20_30k_or_less + nocollege , data = part3_sessions, disp = c("means", "sds"), binary = "std", stats = "mean.diffs",thresholds = c(m = .1, v = 2))

part3_sessions <- sessions_w_demog %>% filter(Stepgroup.Label == "ElectionsandContest")
part3_sessions$leader <- NA
part3_sessions[(part3_sessions$isleader ==1), ]$leader <- 1
part3_sessions[(part3_sessions$isleader ==0), ]$leader <- 0
part3_sessions$treat <- part3_sessions$leader

names(part3_sessions)[names(part3_sessions) == "age.y"] <- "age"

#Table E.2
bal.tab(treat ~ age + female + white + income_20_30k_or_less + nocollege , data = part3_sessions, disp = c("means", "sds"), binary = "std", stats = "mean.diffs", thresholds = c(m = .1, v = 2))











#########################################################################
### Summary values for tickets purchased over all parts of the game.
#     In text: "As in other contest games, players generally purchased more tickets than the Nash
#       equilibrium amount. Over all parts of the game, the average purchase was 0.36 - meaning
#       the tickets purchased equalled 36% of the prize value - with a median of 0.34. These
#       numbers are similar in magnitude to those found in related experiments.
#########################################################################

summary(sessions$effort_pctprize[ which(sessions$isleader == 1)])





#########################################################################
### Section 4
#########################################################################

### Summary numbers in text around Figure 3 and Table 1

#Mean effort level for ICG
summary(sessions$effort_pctprize[ which(sessions$isleader == 1 & sessions$Stepgroup.Label == "LotteryExperiment" & sessions$prize > 1000)])
# N for ICG
length(sessions$effort_pctprize[ which(sessions$isleader == 1 & sessions$Stepgroup.Label == "LotteryExperiment" & sessions$prize > 1000)])
# Mean effort level for DSG
summary(sessions$effort_pctprize[ which(sessions$isleader == 1 & sessions$Stepgroup.Label == "ElectionsandContest" & sessions$prize > 1000)])
# N for DSG
length(sessions$effort_pctprize[ which(sessions$isleader == 1 & sessions$Stepgroup.Label == "ElectionsandContest" & sessions$prize > 1000)])

# Mean effort level for RSG
summary(sessions$effort_pctprize[ which(sessions$isleader == 1 & sessions$Stepgroup.Label == "GroupExperiment" & sessions$prize > 1000)])
# N for RSG
length(sessions$effort_pctprize[ which(sessions$isleader == 1 & sessions$Stepgroup.Label == "GroupExperiment" & sessions$prize > 1000)])
# Mean number of tickets bought for RSG
summary(sessions$boughtTickets[ which(sessions$isleader == 1 & sessions$Stepgroup.Label == "GroupExperiment" & sessions$prize > 1000)])


#### Figure 3 top and bottom plots, Table 1

# Density plot of effort by section (DSG, RSG, ICG). Hypothetical decisions and low value rounds are excluded.
mean_isleader_bypart <- ddply(subset(sessions, isleader == 1 & prize > 1000 & !(is.na(effort_pctprize))), "Stepgroup.Label", summarise, grp.mean=mean(effort_pctprize))

### Figure 3: Effort as Percent of Prize by Section (top pane)
ggplot(data = subset(sessions, isleader == 1 & prize > 1000), aes(x=effort_pctprize, color= Stepgroup.Label)) +
  geom_density() +
  geom_rug(aes(x=effort_pctprize, color= Stepgroup.Label, y = 0), position = position_jitter(height = 0)) +
  geom_vline(data = mean_isleader_bypart, aes(xintercept=grp.mean, color= Stepgroup.Label), linetype = "dashed") +	  
  labs(color='') + scale_color_manual(labels = c("DSG", "RSG","ICG"), values = c("black","grey55", "grey79"))  + xlab("Effort as Percent of Prize") + ylab("Density")

#ggsave("../../Drafts and Presentations/greyscale_plots/effort_leaders_bypart123_no_lvr_gray.pdf")

# Numer of leader decision (round) observations per stage
rounds_per_stage_df <- subset(sessions, isleader == 1 & prize > 1000 & !(is.na(effort_pctprize))) %>% group_by(Stepgroup.Label) %>% dplyr::summarise(count = n())

# Number of times participant bought 1000 tickets per stage
max_ticket_purchase_per_stage_df <- subset(sessions, isleader == 1 & prize > 1000 & !(is.na(effort_pctprize))) %>% group_by(Stepgroup.Label) %>% filter(boughtTickets == 1000) %>% dplyr::summarise(count = n())

# Truncation because of the 1000 endowment?
#	How many more times did they pick 1000 in the DSG versus the other sections?
perc_max_ticketpurchase <- merge(rounds_per_stage_df, max_ticket_purchase_per_stage_df, by = "Stepgroup.Label") %>% dplyr::mutate(perc_max_purchase  = 100*(count.y/count.x))

# Density plot of boughtTickets by section (DSG, RSG, ICG). Hypothetical decisions and low value rounds are excluded.
mean_boughttickets_bypart <- ddply(subset(sessions, isleader == 1 & prize > 1000 & !(is.na(boughtTickets))), "Stepgroup.Label", dplyr::summarise, grp.mean=mean(boughtTickets))

### Figure 3: Effort as Number of Tickets Bought by Section (bottom pane)
ggplot(data = subset(sessions, isleader == 1 & prize > 1000), aes(x=boughtTickets, color= Stepgroup.Label)) +
  geom_density() +
  geom_rug(aes(x=boughtTickets, color= Stepgroup.Label, y = 0), position = position_jitter(height = 0)) +
  geom_vline(data = mean_boughttickets_bypart, aes(xintercept=grp.mean, color= Stepgroup.Label), linetype = "dashed") +	  
  labs(color='') + scale_color_manual(labels = c("DSG", "RSG","ICG"), values = c("black","grey55", "grey79")) + xlab("Effort as Number of Tickets Bought") + ylab("Density")

#ggsave("../Drafts and Presentations/boughttickets_leaders_bypart123_gray.pdf")


# Regression, effort levels, compared to the ICG & remove hypotheticals from the sample, excludes low value rounds
test1 <- lm(effort_pctprize ~ ispart3 + ispart2 , data = subset(sessions,isleader == 1 &  prize > 1000 ))


### Table 1. 
stargazer(test1, type = "text",
          title = "Effort by Decision Type",
          covariate.labels = c("DSG","RSG"),
          dep.var.labels = "Effort as Percent of the Prize")

## Summary numbers of NMV

# "The average estimated non-monetary value to winning among..."
sessions$nmv <- 4*sessions$boughtTickets - sessions$prize

sessions$nmv_pct_prize <- sessions$nmv/sessions$prize
sessions$nmv_tix_above_ne <- sessions$boughtTickets - (sessions$prize/4)

summary(sessions$nmv[ which(sessions$isleader == 1 & sessions$Stepgroup.Label == "LotteryExperiment" & sessions$prize > 1000)])
summary(sessions$nmv[ which(sessions$isleader == 1 & sessions$Stepgroup.Label == "GroupExperiment" & sessions$prize > 1000)])
summary(sessions$nmv[ which(sessions$isleader == 1 & sessions$Stepgroup.Label == "ElectionsandContest" & sessions$prize > 1000)])

summary(sessions$nmv_pct_prize[ which(sessions$isleader == 1 & sessions$Stepgroup.Label == "LotteryExperiment" & sessions$prize > 1000)])
summary(sessions$nmv_pct_prize[ which(sessions$isleader == 1 & sessions$Stepgroup.Label == "GroupExperiment" & sessions$prize > 1000)])
summary(sessions$nmv_pct_prize[ which(sessions$isleader == 1 & sessions$Stepgroup.Label == "ElectionsandContest" & sessions$prize > 1000)])


## Summary numbers for the gap between leaders and non-leaders
#   "The increased ticket purchases in the DSG versus the RSG also widened the gap..."

# RSG gap = approx 11 tickets
summary(sessions$boughtTickets[ which(sessions$isleader == 1 & sessions$Stepgroup.Label == "GroupExperiment" & sessions$prize > 1000)])
summary(sessions$boughtTickets[ which(sessions$isleader == 0 & sessions$Stepgroup.Label == "GroupExperiment" & sessions$prize > 1000)])
# DSG gap = approx 76 tickets
summary(sessions$boughtTickets[ which(sessions$isleader == 1 & sessions$Stepgroup.Label == "ElectionsandContest" & sessions$prize > 1000)])
summary(sessions$boughtTickets[ which(sessions$isleader == 0 & sessions$Stepgroup.Label == "ElectionsandContest" & sessions$prize > 1000)])


## Summary numbers for the gap between elected leaders and losing candidates
#   "To show this, we compare the hypothetical decisions of those who did not run with those who ran and lost and the leadership decisions of those who ran and won...."

summary(sessions$effort_pctprize[ which(sessions$isleader == 1 & sessions$Stepgroup.Label == "ElectionsandContest" & sessions$prize > 1000)])
summary(sessions$effort_pctprize[ which(sessions$isleader == 0 & sessions$iscandidate == 1 & sessions$Stepgroup.Label == "ElectionsandContest" & sessions$prize > 1000)])

losingcandidates <- lm(effort_pctprize ~ isleader, data = subset(sessions, Stepgroup.Label == "ElectionsandContest" & prize > 1000 & iscandidate == 1))
summary(losingcandidates)	






#########################################################################
### Section 5: The Election Effect Among Leaders
#########################################################################

# Summary values for some of the text around Table 2
# How many people became elected leaders?
elected_number_times <- sessions %>% filter(everelecleader == 1 & Stepgroup.Label == "ElectionsandContest" & isleader == 1) %>% group_by(participantfe_factor) %>% dplyr::summarise(wins = n())
total_participants_elected_leaders <- nrow(elected_number_times)
num_participants_won_just_once <- elected_number_times %>% filter(wins == 1) %>% nrow()
num_participants_won_six_or_more <- elected_number_times %>% filter(wins >= 6) %>% nrow()


### Table 2 regressions
###	Y = B_0 + B_1*P3L + B_2*P2L	(sample limited to only eventual P3 leaders)
# Covariates are for "is this Part 3" and "is this Part 2" 
# Regression excluding the low value rounds
p3effect_eventualleaders <- lm(effort_pctprize ~ ispart3 + ispart2, data = subset(sessions, isleader == 1 & everelecleader == 1 & prize > 1000))
summary(p3effect_eventualleaders)	


### Y = B_0 + B_1*P3L + B_2*P2L + Participant FE (sample limited to only eventual P3 leaders)
p3effect_eventualleaders_partfe_nolvr <- lm(effort_pctprize ~ ispart3 + ispart2 + participantfe_factor, data = subset(sessions, isleader == 1 & prize > 1000 & everelecleader == 1))
summary(p3effect_eventualleaders_partfe_nolvr)	


### Table 2. Main Regression Table for Election Effect Section
stargazer(p3effect_eventualleaders,p3effect_eventualleaders_partfe_nolvr, type = "text",
          title = "Effort by section, only eventual leaders",
          covariate.labels = c("Decision in the DSG", "Decision in RSG"),
          dep.var.labels = "Effort",
          omit = "participantfe_factor",
          add.lines = list(c("Participant FE", "No", "Yes")))



## Summary text after Table 2, about NMVs
#   "Eventual leaders assign, on average, a non-monetary value to winning equivalent to 652 tickets, which corresponds to 33% of the prize value,..."

summary(sessions$nmv[ which(sessions$isleader == 1 & sessions$Stepgroup.Label == "LotteryExperiment" & sessions$prize > 1000 & sessions$everelecleader == 1)])
summary(sessions$nmv[ which(sessions$isleader == 1 & sessions$Stepgroup.Label == "GroupExperiment" & sessions$prize > 1000 & sessions$everelecleader == 1)])
summary(sessions$nmv[ which(sessions$isleader == 1 & sessions$Stepgroup.Label == "ElectionsandContest" & sessions$prize > 1000 & sessions$everelecleader == 1)])

summary(sessions$nmv_pct_prize[ which(sessions$isleader == 1 & sessions$Stepgroup.Label == "LotteryExperiment" & sessions$prize > 1000 & sessions$everelecleader == 1)])
summary(sessions$nmv_pct_prize[ which(sessions$isleader == 1 & sessions$Stepgroup.Label == "GroupExperiment" & sessions$prize > 1000 & sessions$everelecleader == 1)])
summary(sessions$nmv_pct_prize[ which(sessions$isleader == 1 & sessions$Stepgroup.Label == "ElectionsandContest" & sessions$prize > 1000 & sessions$everelecleader == 1)])






############################################################
## Paper Section 6: Decomposition Analysis
############################################################

## Values for Table 3: summary values used in decomposition analyses
summary(sessions$effort_pctprize[ which(sessions$isleader == 1 & sessions$Stepgroup.Label == "LotteryExperiment" & sessions$everelecleader == 1 & sessions$prize > 1000)])
summary(sessions$effort_pctprize[ which(sessions$isleader == 1 & sessions$Stepgroup.Label == "GroupExperiment" & sessions$everelecleader == 1 & sessions$prize > 1000)])
summary(sessions$effort_pctprize[ which(sessions$isleader ==0 & sessions$Stepgroup.Label == "GroupExperiment" & sessions$everelecleader == 1 & sessions$prize > 1000)])
summary(sessions$effort_pctprize[ which(sessions$isleader == 1 & sessions$Stepgroup.Label == "ElectionsandContest" & sessions$everelecleader == 1 & sessions$prize > 1000)])
summary(sessions$effort_pctprize[ which(sessions$isleader == 0 & sessions$Stepgroup.Label == "ElectionsandContest" & sessions$everelecleader == 1 & sessions$prize > 1000)])
summary(sessions$effort_pctprize[ which(sessions$isleader == 1 & sessions$Stepgroup.Label == "LotteryExperiment" & sessions$everelecleader == 0 & sessions$prize > 1000)])
summary(sessions$effort_pctprize[ which(sessions$isleader == 1 & sessions$Stepgroup.Label == "GroupExperiment" & sessions$everelecleader == 0 & sessions$prize > 1000)])
summary(sessions$effort_pctprize[ which(sessions$isleader ==0 & sessions$Stepgroup.Label == "GroupExperiment" & sessions$everelecleader == 0 & sessions$prize > 1000)])
summary(sessions$effort_pctprize[ which(sessions$isleader == 0 & sessions$Stepgroup.Label == "ElectionsandContest" & sessions$everelecleader == 0 & sessions$prize > 1000)])
summary(sessions$effort_pctprize[ which(sessions$isleader == 1 & sessions$Stepgroup.Label == "LotteryExperiment" &  sessions$prize > 1000)])

#How many people became elected leaders?
elected_number_times <- sessions %>% filter(everelecleader == 1 & Stepgroup.Label == "ElectionsandContest" & isleader == 1) %>% group_by(participantfe_factor) %>% dplyr::summarise(wins = n())
total_participants_elected_leaders <- nrow(elected_number_times)

num_eventual_leaders <-total_participants_elected_leaders 
num_never_leaders <- sessions %>% filter(everelecleader == 0 & Stepgroup.Label == "ElectionsandContest") %>% group_by(participantfe_factor) %>% dplyr::summarise(wins = n()) %>% nrow()

## TABLE 3. Quantities used for decomposition of selection and election effects
# Mean effort in ICG (needed to calculate the overall effect)
e_icg <- mean(sessions$effort_pctprize[ which(sessions$Stepgroup.Label == "LotteryExperiment" & sessions$prize > 1000)], na.rm = TRUE)
## 0.3311892

prop_eventual_leaders <- num_eventual_leaders / (num_eventual_leaders + num_never_leaders)
##0.53

# Decision of eventual dem. leader in ICG
e_l_icg<- mean(sessions$effort_pctprize[ which(sessions$isleader == 1 & sessions$everelecleader == 1 &sessions$Stepgroup.Label == "LotteryExperiment" & sessions$prize > 1000)], na.rm = TRUE)
## 0.3474502
e_l_icg_rounded <- round(e_l_icg, 4)

# Leadership decision of eventual dem. leader in RSG
e_l_rsg<- mean(sessions$effort_pctprize[ which(sessions$isleader == 1 & sessions$everelecleader == 1 &sessions$Stepgroup.Label == "GroupExperiment" & sessions$prize > 1000)], na.rm = TRUE)
## 0.3578113
e_l_rsg_rounded <- round(e_l_rsg, 4)

# Leadership decision of eventual dem. leader in DSG
e_l_dsg<- mean(sessions$effort_pctprize[ which(sessions$isleader == 1 & sessions$everelecleader == 1 &sessions$Stepgroup.Label == "ElectionsandContest" & sessions$prize > 1000)], na.rm = TRUE)
## 0.3880442
e_l_dsg_rounded <- round(e_l_dsg, 4)

# Decision of never dem. leader in ICG
e_nl_icg<- mean(sessions$effort_pctprize[ which(sessions$isleader == 1 & sessions$everelecleader == 0 &sessions$Stepgroup.Label == "LotteryExperiment" & sessions$prize > 1000)], na.rm = TRUE)
## 0.3130959
e_nl_icg_rounded <- round(e_nl_icg, 4)

# Leadership decision of never dem. leader in RSG
e_nl_rsg<- mean(sessions$effort_pctprize[ which(sessions$isleader == 1 & sessions$everelecleader == 0 &sessions$Stepgroup.Label == "GroupExperiment" & sessions$prize > 1000)], na.rm = TRUE)
## 0.3495445
e_nl_rsg_rounded <- round(e_nl_rsg, 4)

# Leadership decision of never dem. leader in DSG
e_nl_dsg<- mean(sessions$effort_pctprize[ which(sessions$isleader == 1 & sessions$everelecleader == 0 &sessions$Stepgroup.Label == "ElectionsandContest" & sessions$prize > 1000)], na.rm = TRUE)
## NaN (Because this is unobserved counterfactual)

# Hypothetical decision of eventual dem. leader in RSG
he_l_rsg<- mean(sessions$effort_pctprize[ which(sessions$isleader == 0 & sessions$everelecleader == 1 &sessions$Stepgroup.Label == "GroupExperiment" & sessions$prize > 1000)], na.rm = TRUE)
## 0.3425559
he_l_rsg_rounded <- round(he_l_rsg, 4)

# Hypothetical decision of eventual dem. leader in DSG
he_l_dsg<- mean(sessions$effort_pctprize[ which(sessions$isleader == 0 & sessions$everelecleader == 1 &sessions$Stepgroup.Label == "ElectionsandContest" & sessions$prize > 1000)], na.rm = TRUE)
## 0.3566984
he_l_dsg_rounded <- round(he_l_dsg, 4)

# Hypothetical decision of never dem. leader in RSG
he_nl_rsg<- mean(sessions$effort_pctprize[ which(sessions$isleader == 0 & sessions$everelecleader == 0 &sessions$Stepgroup.Label == "GroupExperiment" & sessions$prize > 1000)], na.rm = TRUE)
## 0.3067988
he_nl_rsg_rounded <- round(he_nl_rsg, 4)

# Hypothetical decision of never dem. leader in DSG
he_nl_dsg<- mean(sessions$effort_pctprize[ which(sessions$isleader == 0 & sessions$everelecleader == 0 &sessions$Stepgroup.Label == "ElectionsandContest" & sessions$prize > 1000)], na.rm = TRUE)
## 0.3261078
he_nl_dsg_rounded <- round(he_nl_dsg, 4)


# Overall effect
e_l_dsg - e_icg
##0.056855


# If we assume overall effect is driven entirely by selection affect,
e_nl_dsg <- 0.3880-(0.0569/(1-0.53))
## 0.2669362


## e1: NL not affected by election treatment assumption
## e_nl_dsg == e_nl_icg

e_nl_dsg_rounded <- e_nl_icg_rounded
# 0.3131
selection_effect <- (1-prop_eventual_leaders)*(e_l_dsg_rounded - e_nl_dsg_rounded)
# 0.3545267
election_effect <- (prop_eventual_leaders)*(e_l_dsg_rounded - e_l_icg_rounded) + (1-prop_eventual_leaders)*(e_nl_dsg_rounded - e_nl_icg_rounded)
# 0.02133

election_effect/(selection_effect + election_effect)
# 0.3757 ~ 38% of overall effect


## e2: Take NL's hypothetical DSG decision at face value
## e_nl_dsg == he_nl_dsg
e_nl_dsg_rounded <- he_nl_dsg_rounded
# 0.3261

selection_effect <- (1-prop_eventual_leaders)*(e_l_dsg_rounded - e_nl_dsg_rounded)
# 0.02929933
election_effect <- (prop_eventual_leaders)*(e_l_dsg_rounded - e_l_icg_rounded) + (1-prop_eventual_leaders)*(e_nl_dsg_rounded - e_nl_icg_rounded)
# 0.02748333

election_effect/(selection_effect + election_effect)
# 0.4840092 ~ 48% of overall effect


### e3: Decomposition Using Hypothetical Efforts
## e_nl_dsg_rounded == he_nl_dsg_rounded + e_nl_rsg_rounded - he_nl_rsg_rounded

e_nl_dsg_rounded <- he_nl_dsg_rounded + e_nl_rsg_rounded - he_nl_rsg_rounded
# 0.3688

selection_effect <- (1-prop_eventual_leaders)*(e_l_dsg_rounded - e_nl_dsg_rounded)
# 0.009088
election_effect <- (prop_eventual_leaders)*(e_l_dsg_rounded - e_l_icg_rounded) + (1-prop_eventual_leaders)*(e_nl_dsg_rounded - e_nl_icg_rounded)
# 0.048

election_effect/(selection_effect + election_effect)
# 0.8399512 ~ 84% of overall effect


### e4: Decomposition Using Difference in two differences
alpha <- (e_l_dsg_rounded - he_l_dsg_rounded)/(e_l_rsg_rounded - he_l_rsg_rounded)
e_nl_dsg <- he_nl_dsg_rounded +alpha*(e_nl_rsg_rounded - he_nl_rsg_rounded)
e_nl_dsg_rounded <- round(e_nl_dsg, 4)
#0.414

selection_effect <- (1-prop_eventual_leaders)*(e_l_dsg_rounded - e_nl_dsg_rounded)
# -0.01230667
election_effect <- (prop_eventual_leaders)*(e_l_dsg_rounded - e_l_icg_rounded) + (1-prop_eventual_leaders)*(e_nl_dsg_rounded - e_nl_icg_rounded)
# 0.0690893

election_effect/(selection_effect + election_effect)
# 1.216733


### Decomposition Using Group-leader Effects
# (Another decomposiiton that we didn't reference in the main text or appendix)
beta <- (e_l_dsg_rounded - e_l_rsg_rounded) / (e_l_rsg_rounded - e_l_icg_rounded)
beta_rounded <- round(beta, 4)
# 2.932

e_nl_dsg<- beta_rounded* (e_nl_rsg_rounded - e_nl_icg_rounded) + e_nl_rsg_rounded
e_nl_dsg_rounded <- round(e_nl_dsg, 4)
# 0.4562

selection_effect <- (1-prop_eventual_leaders)*(e_l_dsg_rounded - e_nl_dsg_rounded)
# -0.03228133
election_effect <- (prop_eventual_leaders)*(e_l_dsg_rounded - e_l_icg_rounded) + (1-prop_eventual_leaders)*(e_nl_dsg_rounded - e_nl_icg_rounded)
# 0.089064


## Figure 4

## Equations: 

x<-seq(0.25,0.5,0.001)
y_selection<-(1-prop_eventual_leaders)*(e_l_dsg-x)
y_election<-prop_eventual_leaders*(e_l_dsg - e_l_icg) + (1-prop_eventual_leaders)*(x-e_nl_icg)

pdf(file="Fig_Decompositions.pdf")
plot(x,y_selection,type="l",ylim<-c(min(y_selection),max(y_election)),xlim=c(0.25,0.5),col="black",ylab="Effect Size",xlab="Leadership Decision of Never Leaders in DSG",lwd=2, lty = 1)
lines(x,y_election,col="black",lwd=2, lty = 2)
lines(c(-0.5,1.5),c(0,0))

#zero election effect among never leaders
e1<- e_nl_icg_rounded
lines(c(e1,e1),c(-0.5,0.5),lty=3)
#hypothetical effort
e2<-he_nl_dsg_rounded
lines(c(e2,e2),c(-0.5,0.5),lty=3)
#proportional hypothetical effort (only nl)
e3<-he_nl_dsg_rounded +e_nl_rsg_rounded - he_nl_rsg_rounded
lines(c(e3,e3),c(-0.5,0.5),lty=3)
#proportional hypothetical effort (using l too)
e4<-he_nl_dsg_rounded + ((e_l_dsg_rounded-he_l_dsg_rounded)/(e_l_rsg_rounded-he_l_rsg_rounded))*(e_nl_rsg_rounded -he_nl_rsg_rounded)
e4 <- round(e4, 4)

lines(c(e4,e4),c(-0.5,0.5),lty=3)


#zero election effect
#e1<- e_nl_icg-(prop_eventual_leaders/(1-prop_eventual_leaders))*(e_l_dsg -e_l_icg)
#election effect  = selection effect
#e2<- ((e_l_dsg+e_nl_icg)/2) - (prop_eventual_leaders/(2*(1-prop_eventual_leaders)))*(e_l_dsg - e_l_icg)

legend(0.43,0.03,c("Election","Selection"),lty=c(2,1),lwd=c(2,2))
text(e1-0.005,0.1,"e1")
text(e2-0.005,0.1,"e2")
text(e3-0.005,0.1,"e3")
text(e4-0.005,0.1,"e4")
dev.off()


x<-e1
y_election_e1<-prop_eventual_leaders*(e_l_dsg_rounded - e_l_icg_rounded) + (1-prop_eventual_leaders)*(x-e_nl_icg_rounded)
y_selection_e1<-(1-prop_eventual_leaders)*(e_l_dsg_rounded-x)
e_1_election_effect_pct <- y_election_e1/(y_election_e1+y_selection_e1)


x<-e2
y_election_e2<-prop_eventual_leaders*(e_l_dsg_rounded - e_l_icg_rounded) + (1-prop_eventual_leaders)*(x-e_nl_icg_rounded)
y_selection_e2<-(1-prop_eventual_leaders)*(e_l_dsg_rounded-x)
e_2_election_effect_pct <- y_election_e2/(y_election_e2+y_selection_e2)


x<-e3
y_election_e3<-prop_eventual_leaders*(e_l_dsg_rounded - e_l_icg_rounded) + (1-prop_eventual_leaders)*(x-e_nl_icg_rounded)
y_selection_e3<-(1-prop_eventual_leaders)*(e_l_dsg_rounded-x)
e_3_election_effect_pct <- y_election_e3/(y_election_e3+y_selection_e3)

x<-e4
y_election_e4<-prop_eventual_leaders*(e_l_dsg_rounded - e_l_icg_rounded) + (1-prop_eventual_leaders)*(x-e_nl_icg_rounded)
y_selection_e4<-(1-prop_eventual_leaders)*(e_l_dsg_rounded-x)
e_4_election_effect_pct <- y_election_e4/(y_election_e4+y_selection_e4)





#################################################################################
#####  Appendix B: Robustness of Effect of Dem. Selection
#################################################################################


# Appendix B bullet point 1: main effect holds even when including low value rounds and adding the observations from suspected two bots
# Regression, effort levels, compared to the ICG & remove hypotheticals from the sample, includes low value rounds

# Matching the names of all the columns, deleting some of the NMV stuff from above
sessionsshort <- subset(sessions, select = -c(age, nmv, nmv_pct_prize, nmv_tix_above_ne))
suspected_bot_observations <- subset(suspected_bot_observations, select = -c(age))
sessions_w_bots <- rbind(sessionsshort, suspected_bot_observations)
test2.5 <- lm(effort_pctprize ~ ispart3 + ispart2, data = subset(sessions_w_bots,isleader == 1))

stargazer(test2.5, type = "text",
          title = "Effort by Decision Type (includes low value rounds)",
          covariate.labels = c("DSG","RSG"),
          dep.var.labels = "Effort as Percent of the Prize")

# Appendix B bullet point 2 : main effect comparing DSG and RSG
test1.5 <- lm(effort_pctprize ~ ispart3, data = subset(sessions,isleader == 1 &  prize > 1000 & Stepgroup.Label != "LotteryExperiment" ))


stargazer(test1.5, type = "text",
          title = "Effort Difference between DSG and RSG",
          covariate.labels = c("DSG"),
          dep.var.labels = "Effort as Percent of the Prize")


# Appendix  B bullet point 3: main effect (between DSG and RSG) greater when controlling for prize amount levels
test3 <- lm(effort_pctprize ~ ispart3 + ispart2leader + as.factor(payofflevel), data = subset(sessions, prize > 1000 & isleader==1))

stargazer(test3, type = "text",
          title = "Effort by Decision Type",
          covariate.labels = c("DSG","RSG", "Prize Levels"),
          dep.var.labels = "Effort as Percent of the Prize")

# Appendix  B bullet point 4: effect on NMV
bullet4a <- lm(nmv ~ ispart3 + ispart2, data = subset(sessions,isleader == 1 &  prize > 1000 ))
bullet4b <- lm(nmv ~ ispart3 + ispart2 + as.factor(payofflevel), data = subset(sessions,isleader == 1 &  prize > 1000 ))
bullet4c <- lm(nmv ~ ispart3 + ispart2 + participantfe_factor, data = subset(sessions,isleader == 1 &  prize > 1000 ))
bullet4d <- lm(nmv ~ ispart3 + ispart2 + participantfe_factor + as.factor(payofflevel), data = subset(sessions,isleader == 1 &  prize > 1000 ))

stargazer(bullet4a, bullet4b, bullet4c, bullet4d, type = "latex",
          title = "Non-monetary Value of Winning (NMV) across sections. Base category is ICG",
          covariate.labels = c("Decision in DSG", "Decision in RSG"),
          dep.var.labels = "NMV",
          omit = c("participantfe_factor","payofflevel"),
          add.lines = list(c("Participant FE", "No","No","Yes","Yes"), c("Prize FE","No","Yes","No","Yes")))



















#################################################################################
#####  Appendix C: Alternative Explanations
#################################################################################

#####  Appendix C1: Is the DSG effect larger or smaller in "re-election" rounds?

subset <- sessions %>% filter(isleader == 1 & prize > 1000 & Stepgroup.Label == "ElectionsandContest" & !(is.na(effort_pctprize)))
mean_effort_dsg_by_round <- ddply(subset, "isoddround", dplyr::summarise, grp.mean=mean(effort_pctprize))
#  isoddround  grp.mean
#1          0 0.3764027
#2          1 0.3996857

### Is effort between different DSG round type (odd vs even rounds) statistically significant? 
odd_round_test <- lm(effort_pctprize ~ isoddround , data = subset(sessions, isleader == 1 & prize > 1000 & earlydrop == 0 & boughtTickets <1001 & Stepgroup.Label == "ElectionsandContest"))

## Result mentioned in C1
summary(odd_round_test)
stargazer(odd_round_test, type = "text",
          title = "Effort by Round Type in DSG",
          covariate.labels = c("Odd round (re-election concern)"),
          dep.var.labels = "Effort as Percent of the Prize")


####################################################################
## APPENDIX C2. Wealth Effects vs. Non-Monetary Value to Winning   ##
#####################################################################


## Figure C1 shows the effect of being an elected leader on effort levels interacted with prize 
nmv_model <- lm(effort_pctprize ~ ispart3*prize, data = subset(sessions, isleader == 1 & Stepgroup.Label != "LotteryExperiment" ))
summary(nmv_model)	

prizes<-as.numeric(levels(as.factor(sessions$prize)))

p0<-predict(nmv_model,newdata=data.frame(ispart3=0, prize=prizes),interval="confidence",level=0.9)
p1<-predict(nmv_model,newdata=data.frame(ispart3=1, prize=prizes),interval="confidence",level=0.9)

## Figure C.1
pdf(file="Fig_NMV.pdf")
plot(prizes,p0[,1],ylim=c(0.25,0.7),col="grey55",pch=20,cex=2,xlab="Prize Value",ylab="Predicted Effort as Percent of Prize")
points(prizes,p1[,1],col="black",pch=20,cex=2)
lines(prizes,p0[,1],col="grey55",lwd=2)
lines(prizes,p1[,1],col="black",lwd=2)

col2rgb("black")
for(i in c(1:length(prizes))){
  segments(prizes[i],p0[i,2],prizes[i],p0[i,3],col="grey55")
}


x<-c(prizes[1],prizes[1:length(prizes)],prizes[length(prizes)],prizes[length(prizes)],rev(prizes[1:length(prizes)]),prizes[1])
y<-c(p0[1,2],p0[c(1:length(prizes)),2],p0[length(prizes),2],p0[length(prizes),3],rev(p0[c(1:length(prizes)),3]),p0[1,3])
polygon(x,y,border=NA,col=rgb(140/255,140/255,140/255,alpha=0.1))

for(i in c(1:length(prizes))){
  segments(prizes[i],p1[i,2],prizes[i],p1[i,3],col="black")
}
y<-c(p1[1,2],p1[c(1:length(prizes)),2],p1[length(prizes),2],p1[length(prizes),3],rev(p1[c(1:length(prizes)),3]),p1[1,3])
polygon(x,y,border=NA,col=rgb(0,0,0,alpha=0.2))

legend(2000,0.7,c("DSG","RSG"),col=c("black","grey55"),pch=19,lty=1)
dev.off()
















###########################################################################
### APPENDIX D	Campaign Messages
###########################################################################

#setwd("~/Dropbox/Selection Experiments Harvard/Session Data/")
#messages <- read.csv("campaign messages/testdata_messages_2020_12_23.csv")

messages <- merge(messages, sessions_leaderids, by = c("sessionno","Stepgroup.Label","Stepgroup.Loop"), all = TRUE)
messages <- messages %>% filter(is.na(msg_type_primary) == FALSE) # There are filler rows in the leaderids datadframe

messages$isleader <- ifelse((messages$leader_g1 == messages$Participant | messages$leader_g2 == messages$Participant),1,0)

messages <- messages %>% filter(sessionno != 217) ### 217 is the cheater session; we coded all their messages but they aren't included in any of the analysis

# merging in the prize values from the sessions dataframe
temp <- sessions %>% filter(Stepgroup.Label == "ElectionsandContest") %>% distinct(sessionno, Stepgroup.Label, Stepgroup.Loop, .keep_all = TRUE)

messages <- merge(x = messages, y = temp[ , c("sessionno","Stepgroup.Label","Stepgroup.Loop","prize")], by = c("sessionno","Stepgroup.Label","Stepgroup.Loop"))

# change all the nas to zero
messages[is.na(messages)] <- 0

# creating effort as a percent of prize
messages$effort_pctprize <- messages$boughtTickets / messages$prize


###
## Table D1
###

# left hand column, win rates by primary message type
winrates <- messages %>% group_by(msg_type_primary) %>% mutate(winrate=mean(isleader, na.rm = TRUE))
table(winrates$winrate, winrates$msg_type_primary)

# right hand column, win rates for any message content
messages %>% filter(msg_skill == 1) %>% summarise(winrateany=mean(isleader))
messages %>% filter(msg_humor == 1) %>% summarise(winrateany=mean(isleader))
messages %>% filter(msg_trackrecord == 1) %>% summarise(winrateany=mean(isleader))
messages %>% filter(msg_inteam == 1) %>% summarise(winrateany=mean(isleader))
messages %>% filter(msg_pastbad == 1) %>% summarise(winrateany=mean(isleader))
messages %>% filter(msg_bidhigh == 1) %>% summarise(winrateany=mean(isleader))
messages %>% filter(msg_strategy == 1) %>% summarise(winrateany=mean(isleader))
messages %>% filter(msg_null == 1) %>% summarise(winrateany=mean(isleader))
messages %>% filter(msg_bidlow == 1) %>% summarise(winrateany=mean(isleader))
messages %>% filter(msg_speed == 1) %>% summarise(winrateany=mean(isleader))
messages %>% filter(msg_outteam == 1) %>% summarise(winrateany=mean(isleader))

# This gives the (N) values for the left hand side
table(winrates$msg_type_primary)

# This gives the (N) values for the right hand side
dim(messages %>% filter(msg_skill == 1))
dim(messages %>% filter(msg_humor == 1))
dim(messages %>% filter(msg_trackrecord == 1))
dim(messages %>% filter(msg_inteam == 1))
dim(messages %>% filter(msg_pastbad == 1))
dim(messages %>% filter(msg_bidhigh == 1))
dim(messages %>% filter(msg_strategy == 1))
dim(messages %>% filter(msg_null == 1))
dim(messages %>% filter(msg_bidlow == 1))
dim(messages %>% filter(msg_speed == 1))
dim(messages %>% filter(msg_outteam == 1))

#### 
# Communication Effect, Table D.2
####

m1<-lm(effort_pctprize~isleader + islosingcandidate, data=subset(sessions,Stepgroup.Label=="ElectionsandContest"))
m2<-lm(effort_pctprize~isleader + islosingcandidate +participantfe_factor, data=subset(sessions,Stepgroup.Label=="ElectionsandContest"))

stargazer(m1,m2, type = "latex",
          title = "Effect of communication on hypothetical effort levels",
          dep.var.labels = "",
          keep=c("isleader","islosingcandidate","Constant"),
          label="tab:communication",
          covariate.labels=c("Is Leader","Losing Candidate"),
          add.lines = c(list(c("Participant FEs?","N","Y"))))



#######
#  Appendix E 
#######

# Demographic tables, E1 and E2 are at the front end of the R script

# Figure E.1

# Density plot of effort by section (DSG, RSG, ICG). Hypothetical decisions and low value rounds are excluded.
mean_isleader_bypart_lo <- ddply(subset(sessions, isleader == 1 & prize > 1000 & everelecleader == 1 & !(is.na(effort_pctprize))), "Stepgroup.Label", summarise, grp.mean=mean(effort_pctprize))

ggplot(data = subset(sessions, isleader == 1 & prize > 1000 & everelecleader == 1), aes(x=effort_pctprize, color= Stepgroup.Label)) +
  geom_density() +
  geom_rug(aes(x=effort_pctprize, color= Stepgroup.Label, y = 0), position = position_jitter(height = 0)) +
  geom_vline(data = mean_isleader_bypart_lo, aes(xintercept=grp.mean, color= Stepgroup.Label), linetype = "dashed") +	  
  labs(color='') + scale_color_manual(labels = c("DSG", "RSG","ICG"), values = c("black","grey55", "grey79"))  + xlab("Effort as Percent of Prize (Ev. Leaders Only)") + ylab("Density")

# Regression mentioned in text
#     "If we regress effort on an indicator for the DSG
#     and limit the sample to eventual leaders and use the RSG as the base category, we again
#     find that effort is higher in the DSG, with and without participant fixed effects (coefficients of 0.030 and 0.021; p > 0.05; table omitted for length).

m1<-lm(effort_pctprize~ispart3 + ispart1, data=subset(sessions, isleader == 1 & everelecleader == 1 & prize > 1000 & Stepgroup.Label != "LotteryExperiment"))
m2<-lm(effort_pctprize~ispart3 + ispart1 + participantfe_factor, data=subset(sessions, isleader == 1 &  everelecleader == 1 & prize > 1000 & Stepgroup.Label != "LotteryExperiment"))

stargazer(m1,m2, type = "latex",
          title = "Comparing DSG to RSG",
          dep.var.labels = "",
          keep=c("ispart3","ispart","Constant"),
          label="tab:apperegs",
          covariate.labels=c("DSG","ICG"),
          add.lines = c(list(c("Participant FEs?","N","Y"))))

# Numbers and regression mentioned in text
#   "Additionally, we can compare the actual leader decisions in the DSG..."

summary(sessions$effort_pctprize[ which(sessions$isleader == 0 & sessions$iscandidate == 1 & sessions$everelecleader == 1 & sessions$Stepgroup.Label == "ElectionsandContest" & sessions$prize > 1000)])

test <- lm(effort_pctprize ~ isleader, data = subset(sessions, everelecleader == 1 & Stepgroup.Label == "ElectionsandContest" & prize > 1000))
summary(test)

test2 <- lm(effort_pctprize ~ isleader, data = subset(sessions, prize > 1000 & everelecleader == 1 & iscandidate == 1 & Stepgroup.Label == "ElectionsandContest"))
summary(test2)





